Show the code
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)
library(collapse)
library(ggpubr)
library(knitr)Dieser Arbeitsbericht schildert das technische Vorgehen im Rahmen der Analyse der Matomo-Daten des BMBF-Projekt “HaNS”.
Die Matomo-Klickdaten aller Semester der Projektlaufzeit wurden für diese Analyse verarbeitet. Mit Hilfe einer R-Pipeline wurden eine Reihe von Forschungsfragen analysiert.
Der komplette Code ist online dokumentiert unter https://github.com/sebastiansauer/hans. Aus Datenschutzgründen sind online keine Daten eingestellt.
Die zentrale Analyse-Pipeline-Datei ist https://github.com/sebastiansauer/hans/blob/main/_targets.R.
library(targets)
library(tidyverse)
library(ggokabeito)
library(easystats)
library(gt)
library(ggfittext)
library(scales)
library(visdat)
library(collapse)
library(ggpubr)
library(knitr)theme_set(theme_minimal())options(lubridate.week.start = 1) # Monday as first day
#options(collapse_mask = "all") # use collapse for all dplyr operations
options(chromote.headless = "new") # Chrome headleass needed for gtsavetar_load(data_all_fct)Der Roh-Datensatz verfügt über
Jede Zeile entspricht einem “Visit”.
data_all_fct_head100 <-
data_all_fct %>%
select(1:100) %>%
slice_head(n = 100) data_all_fct_head100 %>%
visdat::vis_dat()data_all_fct_head100 %>%
names() [1] "file_id"
[2] "idvisit"
[3] "visitip"
[4] "visitorid"
[5] "fingerprint"
[6] "actiondetails_0_type"
[7] "actiondetails_0_url"
[8] "actiondetails_0_pageidaction"
[9] "actiondetails_0_idpageview"
[10] "actiondetails_0_servertimepretty"
[11] "actiondetails_0_pageid"
[12] "actiondetails_0_sitesearchkeyword"
[13] "actiondetails_0_sitesearchcount"
[14] "actiondetails_0_pageviewposition"
[15] "actiondetails_0_title"
[16] "actiondetails_0_subtitle"
[17] "actiondetails_0_timestamp"
[18] "sitecurrency"
[19] "sitecurrencysymbol"
[20] "serverdate"
[21] "visitserverhour"
[22] "lastactiontimestamp"
[23] "lastactiondatetime"
[24] "servertimestamp"
[25] "firstactiontimestamp"
[26] "servertimepretty"
[27] "serverdatepretty"
[28] "serverdateprettyfirstaction"
[29] "servertimeprettyfirstaction"
[30] "visitortype"
[31] "visitcount"
[32] "dayssincefirstvisit"
[33] "secondssincefirstvisit"
[34] "visitduration"
[35] "visitdurationpretty"
[36] "searches"
[37] "actions"
[38] "interactions"
[39] "referrertype"
[40] "referrertypename"
[41] "referrername"
[42] "referrerurl"
[43] "referrersearchengineurl"
[44] "languagecode"
[45] "language"
[46] "devicetype"
[47] "devicebrand"
[48] "devicemodel"
[49] "operatingsystem"
[50] "operatingsystemname"
[51] "operatingsystemcode"
[52] "operatingsystemversion"
[53] "browserfamily"
[54] "browserfamilydescription"
[55] "browser"
[56] "browsername"
[57] "browsercode"
[58] "browserversion"
[59] "events"
[60] "continent"
[61] "continentcode"
[62] "country"
[63] "countrycode"
[64] "countryflag"
[65] "region"
[66] "regioncode"
[67] "city"
[68] "location"
[69] "latitude"
[70] "longitude"
[71] "visitlocaltime"
[72] "visitlocalhour"
[73] "dayssincelastvisit"
[74] "secondssincelastvisit"
[75] "resolution"
[76] "plugins"
[77] "provider"
[78] "providername"
[79] "providerurl"
[80] "actiondetails_0_pageloadtime"
[81] "actiondetails_0_timespent"
[82] "actiondetails_0_timespentpretty"
[83] "actiondetails_0_pageloadtimemilliseconds"
[84] "actiondetails_1_type"
[85] "actiondetails_1_url"
[86] "actiondetails_1_pageidaction"
[87] "actiondetails_1_idpageview"
[88] "actiondetails_1_servertimepretty"
[89] "actiondetails_1_pageid"
[90] "actiondetails_1_eventcategory"
[91] "actiondetails_1_eventaction"
[92] "actiondetails_1_pageviewposition"
[93] "actiondetails_1_timestamp"
[94] "actiondetails_1_title"
[95] "actiondetails_1_subtitle"
[96] "actiondetails_2_type"
[97] "actiondetails_2_url"
[98] "actiondetails_2_pageidaction"
[99] "actiondetails_2_idpageview"
[100] "actiondetails_2_servertimepretty"
data_all_fct_head100 %>%
glimpse()Rows: 100
Columns: 100
$ file_id <fct> matomo_export_2023-03-23.csv,…
$ idvisit <fct> 17, 16, 15, 13, 14, 11, 10, 1…
$ visitip <fct> 87.150.0.0, 217.84.0.0, 90.18…
$ visitorid <fct> 9b735f0eb17af6a8, c77ae8b840d…
$ fingerprint <fct> 86408e41c606f8f7, 0584640e01c…
$ actiondetails_0_type <fct> search, action, action, actio…
$ actiondetails_0_url <fct> NA, https://hans.th-nuernberg…
$ actiondetails_0_pageidaction <fct> NA, 2, 2, 3, 2, 9, 3, 4, 2, 4…
$ actiondetails_0_idpageview <fct> NA, PlZbGn, 0f7b07, A6owU2, h…
$ actiondetails_0_servertimepretty <fct> "Mar 23, 2023 21:18:53", "Mar…
$ actiondetails_0_pageid <fct> 325, 324, 323, 164, 176, 145,…
$ actiondetails_0_sitesearchkeyword <fct> "GDI", NA, NA, NA, NA, NA, NA…
$ actiondetails_0_sitesearchcount <fct> 0, NA, NA, NA, NA, NA, NA, NA…
$ actiondetails_0_pageviewposition <fct> 1, 1, 1, 1, 1, 1, 1, NA, 1, N…
$ actiondetails_0_title <fct> Site Search, HAnS, HAnS, HAnS…
$ actiondetails_0_subtitle <fct> "GDI", "https://hans.th-nuern…
$ actiondetails_0_timestamp <fct> 2023-03-23 21:18:53, 2023-03-…
$ sitecurrency <fct> USD, USD, USD, USD, USD, USD,…
$ sitecurrencysymbol <fct> $, $, $, $, $, $, $, $, $, $,…
$ serverdate <fct> 2023-03-23, 2023-03-23, 2023-…
$ visitserverhour <fct> 20, 20, 18, 18, 17, 17, 16, 1…
$ lastactiontimestamp <fct> 2023-03-23 20:18:53, 2023-03-…
$ lastactiondatetime <fct> 2023-03-23 20:18:53, 2023-03-…
$ servertimestamp <fct> 2023-03-23 20:18:53, 2023-03-…
$ firstactiontimestamp <fct> 2023-03-23 20:18:53, 2023-03-…
$ servertimepretty <fct> 21:18:53, 21:01:30, 19:56:41,…
$ serverdatepretty <fct> "Thursday, March 23, 2023", "…
$ serverdateprettyfirstaction <fct> "Thursday, March 23, 2023", "…
$ servertimeprettyfirstaction <fct> 21:18:53, 21:01:30, 19:56:41,…
$ visitortype <fct> new, new, new, returning, new…
$ visitcount <fct> 1, 1, 1, 4, 1, 3, 1, 3, 1, 2,…
$ dayssincefirstvisit <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ secondssincefirstvisit <fct> 0, 0, 0, 15464, 0, 10678, 0, …
$ visitduration <fct> 0, 20, 3, 1494, 0, 2555, 79, …
$ visitdurationpretty <fct> 0s, 20s, 3s, 24 min 54s, 0s, …
$ searches <fct> 1, 0, 0, 7, 0, 2, 1, 0, 0, 9,…
$ actions <fct> 1, 1, 1, 158, 1, 12, 9, 1, 1,…
$ interactions <fct> 1, 1, 1, 38, 1, 7, 5, 0, 1, 5…
$ referrertype <fct> website, website, website, di…
$ referrertypename <fct> Websites, Websites, Websites,…
$ referrername <fct> elearning.ohmportal.de, elear…
$ referrerurl <fct> https://elearning.ohmportal.d…
$ referrersearchengineurl <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ languagecode <fct> de-de, de, de-de, en-us, de-d…
$ language <fct> Language code de-de, German, …
$ devicetype <fct> Desktop, Desktop, Desktop, De…
$ devicebrand <fct> Apple, Unknown, Unknown, Appl…
$ devicemodel <fct> Generic Desktop, Generic Desk…
$ operatingsystem <fct> Mac 10.15, Windows 11, Window…
$ operatingsystemname <fct> Mac, Windows, Windows, Mac, i…
$ operatingsystemcode <fct> MAC, WIN, WIN, MAC, IOS, MAC,…
$ operatingsystemversion <fct> 10.15, 11, 10, 10.15, 16.3, 1…
$ browserfamily <fct> WebKit, Blink, Blink, Gecko, …
$ browserfamilydescription <fct> "WebKit (Safari)", "Blink (Ch…
$ browser <fct> Safari 16.3, Microsoft Edge 1…
$ browsername <fct> Safari, Microsoft Edge, Chrom…
$ browsercode <fct> SF, PS, CH, FF, MF, FF, PS, F…
$ browserversion <fct> 16.3, 111, 111, 111, 16.3, 11…
$ events <fct> 0, 0, 0, 120, 0, 5, 4, 1, 0, …
$ continent <fct> Europe, Europe, Europe, North…
$ continentcode <fct> eur, eur, eur, amn, eur, amn,…
$ country <fct> Germany, Germany, Germany, Un…
$ countrycode <fct> de, de, de, us, de, us, de, u…
$ countryflag <fct> plugins/Morpheus/icons/dist/f…
$ region <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ regioncode <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ city <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ location <fct> "Germany", "Germany", "German…
$ latitude <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ longitude <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ visitlocaltime <fct> 21:18:53, 21:01:29, 19:56:40,…
$ visitlocalhour <fct> 21, 21, 19, 18, 18, 17, 17, 1…
$ dayssincelastvisit <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ secondssincelastvisit <fct> 0, 0, 0, 4786, 0, 7331, 0, 73…
$ resolution <fct> 810x1080, 1536x864, 1920x1080…
$ plugins <fct> "cookie", "cookie, pdf", "coo…
$ provider <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ providername <fct> "Unknown", "Unknown", "Unknow…
$ providerurl <fct> NA, NA, NA, NA, NA, NA, NA, N…
$ actiondetails_0_pageloadtime <fct> NA, 2.23s, 0.55s, NA, 1s, NA,…
$ actiondetails_0_timespent <fct> NA, 20, 0, 3, 0, 25, 4, NA, 0…
$ actiondetails_0_timespentpretty <fct> NA, 20s, 0s, 3s, 0s, 25s, 4s,…
$ actiondetails_0_pageloadtimemilliseconds <fct> NA, 2233, 550, NA, 1001, NA, …
$ actiondetails_1_type <fct> NA, NA, NA, event, NA, event,…
$ actiondetails_1_url <fct> NA, NA, NA, "https://hans.th-…
$ actiondetails_1_pageidaction <fct> NA, NA, NA, 10, NA, 10, 4, NA…
$ actiondetails_1_idpageview <fct> NA, NA, NA, w2vQcb, NA, ZZd1q…
$ actiondetails_1_servertimepretty <fct> NA, NA, NA, "Mar 23, 2023 18:…
$ actiondetails_1_pageid <fct> NA, NA, NA, 165, NA, 147, 143…
$ actiondetails_1_eventcategory <fct> NA, NA, NA, click_button, NA,…
$ actiondetails_1_eventaction <fct> NA, NA, NA, "Medien", NA, "ET…
$ actiondetails_1_pageviewposition <fct> NA, NA, NA, 1, NA, 1, 1, NA, …
$ actiondetails_1_timestamp <fct> NA, NA, NA, 2023-03-23 18:37:…
$ actiondetails_1_title <fct> NA, NA, NA, Event, NA, Event,…
$ actiondetails_1_subtitle <fct> NA, NA, NA, "Category: \"\"cl…
$ actiondetails_2_type <fct> NA, NA, NA, action, NA, searc…
$ actiondetails_2_url <fct> NA, NA, NA, "https://hans.th-…
$ actiondetails_2_pageidaction <fct> NA, NA, NA, 9, NA, NA, 2, NA,…
$ actiondetails_2_idpageview <fct> NA, NA, NA, EGozRT, NA, ZZd1q…
$ actiondetails_2_servertimepretty <fct> NA, NA, NA, "Mar 23, 2023 18:…
tar_load(data_slim_filtered)
data_slim_filtered %>%
slice(1:100) |>
gt()| nr | type | value | idvisit |
|---|---|---|---|
| 0 | subtitle | https://hans.th-nuernberg.de/login | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 0 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&role=undefined | 1 |
| 0 | timestamp | 2023-03-23 14:20:12 | 1 |
| 0 | timestamp | 2024-06-06 11:19:16 | 1 |
| 0 | timestamp | 2022-12-05 15:33:45 | 1 |
| 0 | timestamp | 2023-10-04 16:19:46 | 1 |
| 1 | eventcategory | click_videocard | 1 |
| 1 | eventcategory | login | 1 |
| 1 | eventaction | Einführung in HAnS | 1 |
| 1 | eventaction | success | 1 |
| 1 | timestamp | 2023-03-23 14:20:26 | 1 |
| 1 | timestamp | 2024-06-06 11:21:07 | 1 |
| 1 | timestamp | 2022-12-05 15:33:49 | 1 |
| 1 | timestamp | 2023-10-04 16:19:54 | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 1 | subtitle | https://hans.th-nuernberg.de/login?evalId=none&user=undefined&role=undefined | 1 |
| 1 | subtitle | Category: ""click_videocard', Action: ""Einführung in HAnS"" | 1 |
| 1 | subtitle | Category: ""login', Action: ""success"" | 1 |
| 2 | subtitle | Category: ""click_toggle', Action: ""Evaluation"" | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/video-player?uuid=b57c1dfe-a667-48a6-b43d-dac7e600ae8c | 1 |
| 2 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 2 | timestamp | 2023-03-23 14:20:28 | 1 |
| 2 | timestamp | 2022-12-05 15:33:49 | 1 |
| 2 | timestamp | 2023-10-04 16:19:54 | 1 |
| 2 | eventcategory | click_toggle | 1 |
| 2 | eventaction | Evaluation | 1 |
| 3 | eventcategory | click_button | 1 |
| 3 | eventcategory | videoplayer_click | 1 |
| 3 | eventcategory | click_button | 1 |
| 3 | eventaction | Kanäle | 1 |
| 3 | eventaction | play | 1 |
| 3 | eventaction | Kanäle | 1 |
| 3 | timestamp | 2023-03-23 14:20:30 | 1 |
| 3 | timestamp | 2022-12-05 15:33:49 | 1 |
| 3 | timestamp | 2023-10-04 16:19:56 | 1 |
| 3 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 3 | subtitle | Category: ""videoplayer_click', Action: ""play"" | 1 |
| 3 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/channels | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 4 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 4 | timestamp | 2023-03-23 14:20:30 | 1 |
| 4 | timestamp | 2022-12-05 15:33:52 | 1 |
| 4 | timestamp | 2023-10-04 16:19:56 | 1 |
| 5 | eventcategory | click_button | 1 |
| 5 | eventcategory | click_button | 1 |
| 5 | eventaction | Medien | 1 |
| 5 | eventaction | Medien | 1 |
| 5 | timestamp | 2023-03-23 14:20:31 | 1 |
| 5 | timestamp | 2022-12-05 15:33:52 | 1 |
| 5 | timestamp | 2023-10-04 16:21:23 | 1 |
| 5 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 5 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 5 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 5 | pagetitle | HAnS | 1 |
| 6 | eventcategory | click_button | 1 |
| 6 | eventaction | Medien | 1 |
| 6 | timestamp | 2023-03-23 14:20:31 | 1 |
| 6 | timestamp | 2022-12-05 15:55:43 | 1 |
| 6 | timestamp | 2023-10-04 16:21:23 | 1 |
| 6 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 6 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 6 | subtitle | https://hans.th-nuernberg.de/?evalId=none&role=developer | 1 |
| 6 | pagetitle | HAnS | 1 |
| 6 | pagetitle | HAnS | 1 |
| 7 | pagetitle | HAnS | 1 |
| 7 | subtitle | Category: ""click_toggle', Action: ""Evaluation"" | 1 |
| 7 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 7 | subtitle | Category: ""click_button', Action: ""Medien"" | 1 |
| 7 | timestamp | 2023-03-23 14:20:34 | 1 |
| 7 | timestamp | 2022-12-05 16:03:01 | 1 |
| 7 | timestamp | 2023-10-04 16:25:22 | 1 |
| 7 | eventcategory | click_toggle | 1 |
| 7 | eventcategory | click_button | 1 |
| 7 | eventaction | Evaluation | 1 |
| 7 | eventaction | Medien | 1 |
| 8 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 8 | subtitle | https://hans.th-nuernberg.de/ | 1 |
| 8 | subtitle | Category: ""click_button', Action: ""Kanäle"" | 1 |
| 8 | timestamp | 2023-03-23 14:21:47 | 1 |
| 8 | timestamp | 2022-12-05 16:03:06 | 1 |
| 8 | timestamp | 2023-10-04 16:25:23 | 1 |
| 8 | eventcategory | click_button | 1 |
| 8 | eventaction | Kanäle | 1 |
| 9 | eventcategory | click_toggle | 1 |
| 9 | eventaction | Evaluation | 1 |
| 9 | timestamp | 2023-03-23 14:22:41 | 1 |
| 9 | timestamp | 2022-12-05 16:03:16 | 1 |
| 9 | timestamp | 2023-10-04 16:25:23 | 1 |
| 9 | subtitle | Category: ""click_toggle', Action: ""Evaluation"" | 1 |
| 9 | subtitle | https://hans.th-nuernberg.de/video-player?uuid=b57c1dfe-a667-48a6-b43d-dac7e600ae8c | 1 |
| 9 | subtitle | https://hans.th-nuernberg.de/channels?evalId=none&role=developer | 1 |
| 10 | eventcategory | videoplayer_click | 1 |
| 10 | eventaction | play | 1 |
| 10 | timestamp | 2023-03-23 14:22:45 | 1 |
| 10 | timestamp | 2022-12-05 16:03:16 | 1 |
| 10 | timestamp | 2023-10-04 16:25:24 | 1 |
| 10 | subtitle | https://hans.th-nuernberg.de/channels | 1 |
Entfernt man Developer, Admins und Lecturers aus dem Roh-Datensatz so bleiben weniger Zeilen übrig:
tar_load(data_users_only)tar_load(count_action)tar_load(config)tar_load(time_minmax)time_minmax |>
summarise(time_min = min(time_min, na.rm = T),
time_max = max(time_max, na.rm = T)) |>
gt()| time_min | time_max |
|---|---|
| 2022-12-05 15:33:45 | 2025-03-03 14:08:54 |
Diese Statistik wurde auf Basis des Datenobjekts data_slim_filtered berechnet.
tar_load(time_visit_wday)
tar_load(time_since_last_visit)time_since_last_visit <-
time_since_last_visit |>
mutate(dayssincelastvisit = as.numeric(dayssincelastvisit))
time_since_last_visit |>
datawizard::describe_distribution(dayssincelastvisit) |>
knitr::kable(digits = 2)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| dayssincelastvisit | 6.3 | 15.38 | 0 | 1 | 92 | 3.32 | 10.84 | 18924 | 162 |
time_since_last_visit |>
ggplot(aes(x=dayssincelastvisit)) +
geom_density()Wie viele Visits (von Hans) gab es?
time_visit_wday_summary <-
time_visit_wday |>
ungroup() |>
mutate(month_start = floor_date(date_time, "month")) |>
mutate(month_name = month(date_time, label = TRUE, abbr = FALSE),
month_num = month(date_time, label = FALSE),
year_num = year(date_time)) |>
group_by(year_num, month_num) |>
summarise(n = n())time_visit_wday_summary |>
gt()| month_num | n |
|---|---|
| 2022 | |
| 12 | 25 |
| 2023 | |
| 1 | 20 |
| 2 | 77 |
| 3 | 99 |
| 4 | 219 |
| 5 | 300 |
| 6 | 246 |
| 7 | 390 |
| 8 | 20 |
| 9 | 27 |
| 10 | 22 |
| 11 | 8 |
| 12 | 98 |
| 2024 | |
| 1 | 552 |
| 2 | 71 |
| 3 | 51 |
| 4 | 94 |
| 5 | 303 |
| 6 | 200 |
| 7 | 192 |
| 8 | 6 |
| 9 | 5 |
| 10 | 8 |
| 11 | 149 |
| 12 | 650 |
| 2025 | |
| 1 | 830 |
| 2 | 131 |
| 3 | 2 |
time_visit_wday_summary2 <-
time_visit_wday |>
ungroup() |>
mutate(month_start = floor_date(date_time, "month")) |>
mutate(month_name = month(date_time, label = TRUE, abbr = FALSE),
month_num = month(date_time, label = FALSE),
year_num = year(date_time)) |>
group_by(year_num, month_start) |>
summarise(n = n())
time_visit_wday_summary2 |>
ggplot(aes(x = month_start, y = n)) +
geom_line(group = 1, color = "grey60") +
geom_point() time_visit_wday_summary2 <-
time_visit_wday |>
ungroup() |>
mutate(month_start = floor_date(date_time, "month")) |>
mutate(month_name = month(date_time, label = TRUE, abbr = FALSE),
month_num = month(date_time, label = FALSE),
year_num = year(date_time)) |>
group_by(year_num, month_start) |>
summarise(n = n()) |>
ungroup() |>
mutate(n_cumsum = cumsum(n))
time_visit_wday_summary2 |>
ggplot(aes(x = month_start, y = n_cumsum)) +
geom_line(group = 1, color = "grey60") +
geom_point() Die folgenden Statistiken beruhen auf dem Datensatz data_slim_filtered:
glimpse(data_slim_filtered)Rows: 3,597,119
Columns: 4
$ nr <int> 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2,…
$ type <fct> subtitle, subtitle, subtitle, subtitle, timestamp, timestamp, …
$ value <chr> "https://hans.th-nuernberg.de/login", "https://hans.th-nuernbe…
$ idvisit <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
nr fasst die Nummer der Aktion innerhalb eines bestimmten Visits.
tbl_count_action <-
count_action |>
describe_distribution(nr_max) |>
gt() |>
fmt_number(columns = where(is.numeric),
decimals = 2)
tbl_count_action| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| nr_max | 129.35 | 152.20 | 161.00 | 1.00 | 499.00 | 1.40 | 0.77 | 4,795.00 | 0.00 |
gtsave(tbl_count_action, filename = "reports/tbl_count_action.png")
nr_max gibt den Maximalwert von nr zurück, sagt also, wie viele Aktionen maximal von einem Visitor ausgeführt wurden.
Betrachtet man die Anzahl der Aktionen pro Visitor näher, so fällt auf, dass der Maximalwert (499) sehr häufig vorkommt:
count_action |>
count(nr_max) |>
ggplot(aes(x = nr_max, y = n)) +
geom_col()Hier noch in einer anderen Darstellung:
count_action |>
count(nr_max) |>
ggplot(aes(x = nr_max, y = n)) +
geom_point()Der Maximalwert ist einfach auffällig häufig:
count_action |>
count(nr_max == 499) |>
gt()| nr_max == 499 | n |
|---|---|
| FALSE | 4386 |
| TRUE | 409 |
Es erscheint plausibel, dass der Maximalwert alle “gekappten” (zensierten, abgeschnittenen) Werte fasst, also viele Werte, die eigentlich größer wären (aber dann zensiert wurden).
count_action2 <-
count_action |>
filter(nr_max != 499)
count_action2 |>
describe_distribution(nr_max) |>
gt() |>
fmt_number(columns = where(is.numeric),
decimals = 2)| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| nr_max | 94.89 | 106.74 | 122.00 | 1.00 | 496.00 | 1.56 | 2.01 | 4,386.00 | 0.00 |
count_action_avg = mean(count_action$nr_max)
count_action_sd = sd(count_action$nr_max)
count_action |>
ggplot() +
geom_histogram(aes(x = nr_max)) +
labs(x = "Anzahl von Aktionen pro Visit",
y = "n",
caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
theme_minimal() +
geom_vline(xintercept = count_action_avg,
color = palette_okabe_ito()[1]) +
geom_segment(x = count_action_avg-count_action_sd,
y = 0,
xend = count_action_avg + count_action_sd,
yend = 0,
color = palette_okabe_ito()[2],
size = 2) +
annotate("label", x = count_action_avg, y = 1500, label = "MW") +
annotate("label", x = count_action_avg + count_action_sd, y = 0, label = "SD") #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")count_action_avg2 = mean(count_action2$nr_max)
count_action_sd2 = sd(count_action2$nr_max)
count_action2 |>
ggplot() +
geom_histogram(aes(x = nr_max)) +
labs(x = "Anzahl von Aktionen pro Visit",
y = "n",
title = "Verteilung der User-Aktionen pro Visit",
caption = "Der vertikale Strich zeigt den Mittelwert; der horizontale die SD") +
theme_minimal() +
geom_vline(xintercept = count_action_avg2,
color = palette_okabe_ito()[1]) +
geom_segment(x = count_action_avg-count_action_sd2,
y = 0,
xend = count_action_avg2 + count_action_sd2,
yend = 0,
color = palette_okabe_ito()[2],
size = 2) +
annotate("label", x = count_action_avg2, y = 1500, label = "MW", vjust = "top") +
annotate("label", x = count_action_avg2 + count_action_sd2, y = 0, label = "SD", vjust = "bottom") #geom_label(aes(x = count_action_avg), y = 1, label = "Mean")Die Visit-Zeit wurde auf 600 Min. trunkiert/begrenzt.
tar_load(time_spent)
tar_load(time_duration)
time_spent <-
time_spent |>
mutate(t_min = as.numeric(time_diff, units = "mins")) |>
filter(t_min < 600)time_spent |>
summarise(
mean_time_diff = round(mean(time_diff), 2),
sd_time_diff = sd(time_diff),
min_time_diff = min(time_diff),
max_time_diff = max(time_diff)
) |>
summarise(
mean_time_diff_avg = mean(mean_time_diff),
sd_time_diff_avg = mean(sd_time_diff, na.rm = TRUE),
min_time_diff_avg = mean(min_time_diff),
max_time_diff_avg = mean(max_time_diff)
) |>
gt() |>
fmt_number(columns = everything(),
decimals = 2)| mean_time_diff_avg | sd_time_diff_avg | min_time_diff_avg | max_time_diff_avg |
|---|---|---|---|
| 142.52 | 0.00 | 142.52 | 142.52 |
tar_load(time_duration)
time_duration |>
summarise(duration_sec_avg = mean(visitduration_sec, na.rm = TRUE)) |>
mutate(duration_min_avg = duration_sec_avg / 60) duration_sec_avg duration_min_avg
1 1289.434 21.49056
time_spent_summary <-
time_spent |>
summarise(
mean_t_min = mean(t_min),
sd_t_min = sd(t_min),
min_t_min = min(t_min),
max_t_min = max(t_min)
) |>
summarise(
mean_t_min_avg = mean(mean_t_min),
sd_t_min_avg = mean(sd_t_min, na.rm = TRUE),
min_t_min_avg = mean(min_t_min),
max_t_min_avg = mean(max_t_min)
)
time_spent_summary |>
gt() |>
fmt_number(columns = everything(),
decimals = 2)| mean_t_min_avg | sd_t_min_avg | min_t_min_avg | max_t_min_avg |
|---|---|---|---|
| 142.52 | 0.00 | 142.52 | 142.52 |
small_padding_theme <- ggpubr::ttheme(
tbody.style = tbody_style(size = 8), # Smaller font size can help
colnames.style = colnames_style(size = 9, face = "bold"),
padding = unit(c(2, 2), "mm") # Reduce horizontal and vertical padding
)ggpubr::ggtexttable(time_spent_summary,
rows = NULL,
theme = small_padding_theme)time_spent |>
ggplot(aes(x = t_min)) +
geom_histogram() +
scale_x_time() +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in HaNS pro Visit in Minuten")time_spent |>
ggplot(aes(x = t_min)) +
geom_histogram(binwidth = 5) +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in Minuten",
title = "Verweildauer in HaNS pro Visit",
caption = "binwidth = 5 Min.")time_spent2 <-
time_spent |>
filter(t_min > 1, t_min < 120)
time_spent2 |>
ggplot(aes(x = t_min)) +
geom_histogram(binwidth = 10) +
theme_minimal() +
labs(y = "n",
x = "Verweildauer in HaNS pro Visit in Minuten",
title = "Verweildauer begrenzt auf 1-120 Minuten",
caption = "bindwidth = 10 Min.")time_spent_by_month <-
time_spent |>
mutate(date = ymd_hms(value)) |>
mutate(month_start = floor_date(date, "month")) |>
mutate(month_name = month(month_start, label = TRUE, abbr = FALSE),
month_num = month(month_start, label = FALSE),
year = year(month_start)) |>
group_by(month_num, year) |>
summarise(time_spent_month_avg = mean(t_min, na.rm = TRUE),
time_spent_month_sd = sd(t_min, na.rm = TRUE)) |>
arrange(year, month_num)
time_spent_by_month |>
gt() |>
fmt_auto()| year | time_spent_month_avg | time_spent_month_sd |
|---|---|---|
| 12 | ||
| 2,022 | 10 | 0 |
| 2,023 | 268.264 | 196.857 |
| 2,024 | 251.396 | 176.941 |
| 2 | ||
| 2,023 | 421 | 180.428 |
| 2,024 | 309.517 | 202.612 |
| 2,025 | 214.55 | 172.93 |
| 3 | ||
| 2,023 | 107.075 | 86.062 |
| 2,025 | 9 | 0 |
| 4 | ||
| 2,023 | 379.868 | 230.16 |
| 2,024 | 112 | 0 |
| 7 | ||
| 2,023 | 187.708 | 160.938 |
| 2,024 | 566.692 | 50.806 |
| 8 | ||
| 2,023 | 129.368 | 105.653 |
| 2,024 | 582 | 0 |
| 9 | ||
| 2,023 | 414.391 | 208.662 |
| 10 | ||
| 2,023 | 181.716 | 115.475 |
| 2,024 | 43.284 | 12.632 |
| 1 | ||
| 2,024 | 170.763 | 141.968 |
| 2,025 | 336.1 | 199.91 |
| 5 | ||
| 2,024 | 268.909 | 130.843 |
| 6 | ||
| 2,024 | 336.303 | 182.84 |
| 11 | ||
| 2,024 | 374.266 | 209.407 |
time_spent_by_month |>
mutate(time_spent_month_avg = round(time_spent_month_avg, 2),
time_spent_month_sd = round(time_spent_month_sd, 2)) |>
ggtexttable()time_spent_by_month_name <-
time_spent |>
mutate(date = ymd_hms(value)) |>
mutate(month_start = floor_date(date, "month")) |>
mutate(month_name = month(month_start, label = TRUE, abbr = FALSE),
month_num = month(month_start, label = FALSE),
year = year(month_start)) |>
group_by(month_start, year) |>
summarise(time_spent_month_avg = mean(t_min, na.rm = TRUE),
time_spent_month_sd = sd(t_min, na.rm = TRUE))
time_spent_by_month_name |>
ggplot(aes(x = month_start, y = time_spent_month_avg)) +
geom_line(group = 1, color = "grey60") +
geom_point() tar_load(count_action_type)count_action_type_counted <-
count_action_type |>
drop_na() |>
count(category, sort = TRUE) |>
mutate(prop = round(n/sum(n), 2))
count_action_type_counted |>
gt()| category | n | prop |
|---|---|---|
| video | 657274 | 0.83 |
| click_slideChange | 61934 | 0.08 |
| visit_page | 40549 | 0.05 |
| Media item | 11046 | 0.01 |
| login | 4148 | 0.01 |
| in_media_search | 3044 | 0.00 |
| Search Results Count | 2668 | 0.00 |
| click_topic | 2623 | 0.00 |
| Medien | 1359 | 0.00 |
| logout | 1283 | 0.00 |
| Kanäle | 1222 | 0.00 |
| GESOA | 971 | 0.00 |
| click_channelcard | 661 | 0.00 |
| Evaluation | 173 | 0.00 |
| Data protection | 35 | 0.00 |
count_action_type_per_month <-
count_action_type |>
select(nr, idvisit, category) |>
ungroup() |>
left_join(time_visit_wday |> ungroup()) |>
select(-c(dow, hour, nr)) |>
drop_na() |>
mutate(month_start = floor_date(date_time, "month")) |>
count(month_start, category)count_action_type_per_month |>
gt()| month_start | category | n |
|---|---|---|
| 2022-12-01 | Evaluation | 2 |
| 2022-12-01 | GESOA | 6 |
| 2022-12-01 | Kanäle | 11 |
| 2022-12-01 | Media item | 208 |
| 2022-12-01 | Medien | 12 |
| 2022-12-01 | Search Results Count | 46 |
| 2022-12-01 | click_channelcard | 10 |
| 2022-12-01 | click_slideChange | 337 |
| 2022-12-01 | click_topic | 13 |
| 2022-12-01 | in_media_search | 9 |
| 2022-12-01 | login | 48 |
| 2022-12-01 | logout | 64 |
| 2022-12-01 | video | 2522 |
| 2022-12-01 | visit_page | 423 |
| 2023-01-01 | GESOA | 4 |
| 2023-01-01 | Kanäle | 2 |
| 2023-01-01 | Medien | 2 |
| 2023-01-01 | Search Results Count | 25 |
| 2023-01-01 | click_channelcard | 4 |
| 2023-01-01 | click_slideChange | 438 |
| 2023-01-01 | click_topic | 19 |
| 2023-01-01 | in_media_search | 7 |
| 2023-01-01 | login | 13 |
| 2023-01-01 | logout | 1 |
| 2023-01-01 | video | 3672 |
| 2023-01-01 | visit_page | 179 |
| 2023-02-01 | Data protection | 1 |
| 2023-02-01 | Kanäle | 7 |
| 2023-02-01 | Medien | 10 |
| 2023-02-01 | Search Results Count | 18 |
| 2023-02-01 | click_slideChange | 1823 |
| 2023-02-01 | click_topic | 3 |
| 2023-02-01 | in_media_search | 73 |
| 2023-02-01 | login | 38 |
| 2023-02-01 | logout | 10 |
| 2023-02-01 | video | 10606 |
| 2023-02-01 | visit_page | 491 |
| 2023-03-01 | Data protection | 3 |
| 2023-03-01 | Evaluation | 20 |
| 2023-03-01 | GESOA | 57 |
| 2023-03-01 | Kanäle | 218 |
| 2023-03-01 | Media item | 655 |
| 2023-03-01 | Medien | 215 |
| 2023-03-01 | Search Results Count | 215 |
| 2023-03-01 | click_channelcard | 102 |
| 2023-03-01 | click_slideChange | 1781 |
| 2023-03-01 | click_topic | 109 |
| 2023-03-01 | in_media_search | 86 |
| 2023-03-01 | login | 210 |
| 2023-03-01 | logout | 219 |
| 2023-03-01 | video | 9870 |
| 2023-03-01 | visit_page | 2394 |
| 2023-04-01 | Data protection | 2 |
| 2023-04-01 | Evaluation | 13 |
| 2023-04-01 | GESOA | 70 |
| 2023-04-01 | Kanäle | 181 |
| 2023-04-01 | Media item | 2796 |
| 2023-04-01 | Medien | 187 |
| 2023-04-01 | Search Results Count | 240 |
| 2023-04-01 | click_channelcard | 109 |
| 2023-04-01 | click_slideChange | 5428 |
| 2023-04-01 | click_topic | 203 |
| 2023-04-01 | in_media_search | 100 |
| 2023-04-01 | login | 364 |
| 2023-04-01 | logout | 251 |
| 2023-04-01 | video | 33235 |
| 2023-04-01 | visit_page | 3626 |
| 2023-05-01 | Evaluation | 24 |
| 2023-05-01 | GESOA | 193 |
| 2023-05-01 | Kanäle | 159 |
| 2023-05-01 | Media item | 228 |
| 2023-05-01 | Medien | 132 |
| 2023-05-01 | Search Results Count | 372 |
| 2023-05-01 | click_channelcard | 62 |
| 2023-05-01 | click_slideChange | 8371 |
| 2023-05-01 | click_topic | 445 |
| 2023-05-01 | in_media_search | 608 |
| 2023-05-01 | login | 320 |
| 2023-05-01 | logout | 91 |
| 2023-05-01 | video | 64184 |
| 2023-05-01 | visit_page | 4564 |
| 2023-06-01 | Evaluation | 14 |
| 2023-06-01 | GESOA | 100 |
| 2023-06-01 | Kanäle | 95 |
| 2023-06-01 | Medien | 60 |
| 2023-06-01 | Search Results Count | 337 |
| 2023-06-01 | click_channelcard | 52 |
| 2023-06-01 | click_slideChange | 10802 |
| 2023-06-01 | click_topic | 280 |
| 2023-06-01 | in_media_search | 536 |
| 2023-06-01 | login | 233 |
| 2023-06-01 | logout | 52 |
| 2023-06-01 | video | 70662 |
| 2023-06-01 | visit_page | 3486 |
| 2023-07-01 | Data protection | 5 |
| 2023-07-01 | Evaluation | 28 |
| 2023-07-01 | GESOA | 79 |
| 2023-07-01 | Kanäle | 147 |
| 2023-07-01 | Media item | 68 |
| 2023-07-01 | Medien | 106 |
| 2023-07-01 | Search Results Count | 264 |
| 2023-07-01 | click_channelcard | 68 |
| 2023-07-01 | click_slideChange | 6355 |
| 2023-07-01 | click_topic | 162 |
| 2023-07-01 | in_media_search | 624 |
| 2023-07-01 | login | 262 |
| 2023-07-01 | logout | 141 |
| 2023-07-01 | video | 68992 |
| 2023-07-01 | visit_page | 3784 |
| 2023-08-01 | Evaluation | 1 |
| 2023-08-01 | GESOA | 2 |
| 2023-08-01 | Kanäle | 10 |
| 2023-08-01 | Medien | 8 |
| 2023-08-01 | Search Results Count | 44 |
| 2023-08-01 | click_channelcard | 16 |
| 2023-08-01 | click_slideChange | 301 |
| 2023-08-01 | click_topic | 4 |
| 2023-08-01 | in_media_search | 1 |
| 2023-08-01 | login | 4 |
| 2023-08-01 | logout | 5 |
| 2023-08-01 | video | 2450 |
| 2023-08-01 | visit_page | 199 |
| 2023-09-01 | Evaluation | 5 |
| 2023-09-01 | GESOA | 4 |
| 2023-09-01 | Kanäle | 22 |
| 2023-09-01 | Medien | 10 |
| 2023-09-01 | Search Results Count | 41 |
| 2023-09-01 | click_channelcard | 19 |
| 2023-09-01 | click_slideChange | 109 |
| 2023-09-01 | click_topic | 15 |
| 2023-09-01 | in_media_search | 13 |
| 2023-09-01 | login | 6 |
| 2023-09-01 | logout | 9 |
| 2023-09-01 | video | 3292 |
| 2023-09-01 | visit_page | 267 |
| 2023-10-01 | Kanäle | 7 |
| 2023-10-01 | Media item | 310 |
| 2023-10-01 | Medien | 16 |
| 2023-10-01 | Search Results Count | 5 |
| 2023-10-01 | click_slideChange | 159 |
| 2023-10-01 | click_topic | 21 |
| 2023-10-01 | in_media_search | 5 |
| 2023-10-01 | login | 39 |
| 2023-10-01 | logout | 44 |
| 2023-10-01 | video | 1233 |
| 2023-10-01 | visit_page | 234 |
| 2023-11-01 | Kanäle | 3 |
| 2023-11-01 | Media item | 68 |
| 2023-11-01 | Medien | 1 |
| 2023-11-01 | Search Results Count | 2 |
| 2023-11-01 | click_channelcard | 8 |
| 2023-11-01 | click_slideChange | 98 |
| 2023-11-01 | click_topic | 1 |
| 2023-11-01 | login | 7 |
| 2023-11-01 | logout | 2 |
| 2023-11-01 | video | 1172 |
| 2023-11-01 | visit_page | 76 |
| 2023-12-01 | GESOA | 4 |
| 2023-12-01 | Kanäle | 3 |
| 2023-12-01 | Medien | 34 |
| 2023-12-01 | Search Results Count | 9 |
| 2023-12-01 | click_channelcard | 3 |
| 2023-12-01 | click_slideChange | 884 |
| 2023-12-01 | click_topic | 6 |
| 2023-12-01 | in_media_search | 6 |
| 2023-12-01 | login | 32 |
| 2023-12-01 | logout | 2 |
| 2023-12-01 | video | 8081 |
| 2023-12-01 | visit_page | 373 |
| 2024-01-01 | Evaluation | 23 |
| 2024-01-01 | GESOA | 60 |
| 2024-01-01 | Kanäle | 45 |
| 2024-01-01 | Media item | 144 |
| 2024-01-01 | Medien | 72 |
| 2024-01-01 | Search Results Count | 156 |
| 2024-01-01 | click_channelcard | 14 |
| 2024-01-01 | click_slideChange | 7781 |
| 2024-01-01 | click_topic | 106 |
| 2024-01-01 | in_media_search | 202 |
| 2024-01-01 | login | 317 |
| 2024-01-01 | logout | 92 |
| 2024-01-01 | video | 51149 |
| 2024-01-01 | visit_page | 2830 |
| 2024-02-01 | Evaluation | 1 |
| 2024-02-01 | GESOA | 3 |
| 2024-02-01 | Kanäle | 3 |
| 2024-02-01 | Medien | 3 |
| 2024-02-01 | Search Results Count | 37 |
| 2024-02-01 | click_channelcard | 1 |
| 2024-02-01 | click_slideChange | 786 |
| 2024-02-01 | click_topic | 8 |
| 2024-02-01 | in_media_search | 7 |
| 2024-02-01 | login | 60 |
| 2024-02-01 | logout | 16 |
| 2024-02-01 | video | 12419 |
| 2024-02-01 | visit_page | 448 |
| 2024-03-01 | Data protection | 1 |
| 2024-03-01 | Evaluation | 4 |
| 2024-03-01 | GESOA | 27 |
| 2024-03-01 | Kanäle | 12 |
| 2024-03-01 | Media item | 781 |
| 2024-03-01 | Medien | 21 |
| 2024-03-01 | Search Results Count | 70 |
| 2024-03-01 | click_channelcard | 13 |
| 2024-03-01 | click_slideChange | 526 |
| 2024-03-01 | click_topic | 78 |
| 2024-03-01 | in_media_search | 8 |
| 2024-03-01 | login | 94 |
| 2024-03-01 | logout | 20 |
| 2024-03-01 | video | 6388 |
| 2024-03-01 | visit_page | 924 |
| 2024-04-01 | Data protection | 2 |
| 2024-04-01 | Evaluation | 2 |
| 2024-04-01 | GESOA | 10 |
| 2024-04-01 | Kanäle | 20 |
| 2024-04-01 | Medien | 31 |
| 2024-04-01 | Search Results Count | 84 |
| 2024-04-01 | click_channelcard | 12 |
| 2024-04-01 | click_slideChange | 1727 |
| 2024-04-01 | click_topic | 140 |
| 2024-04-01 | in_media_search | 94 |
| 2024-04-01 | login | 104 |
| 2024-04-01 | logout | 29 |
| 2024-04-01 | video | 17945 |
| 2024-04-01 | visit_page | 1012 |
| 2024-05-01 | Evaluation | 3 |
| 2024-05-01 | GESOA | 20 |
| 2024-05-01 | Kanäle | 31 |
| 2024-05-01 | Medien | 39 |
| 2024-05-01 | Search Results Count | 187 |
| 2024-05-01 | click_channelcard | 16 |
| 2024-05-01 | click_slideChange | 3029 |
| 2024-05-01 | click_topic | 138 |
| 2024-05-01 | in_media_search | 114 |
| 2024-05-01 | login | 195 |
| 2024-05-01 | logout | 41 |
| 2024-05-01 | video | 39557 |
| 2024-05-01 | visit_page | 1987 |
| 2024-06-01 | Data protection | 6 |
| 2024-06-01 | Evaluation | 7 |
| 2024-06-01 | GESOA | 39 |
| 2024-06-01 | Kanäle | 62 |
| 2024-06-01 | Media item | 818 |
| 2024-06-01 | Medien | 84 |
| 2024-06-01 | Search Results Count | 174 |
| 2024-06-01 | click_channelcard | 47 |
| 2024-06-01 | click_slideChange | 4007 |
| 2024-06-01 | click_topic | 200 |
| 2024-06-01 | in_media_search | 109 |
| 2024-06-01 | login | 272 |
| 2024-06-01 | logout | 60 |
| 2024-06-01 | video | 31850 |
| 2024-06-01 | visit_page | 2390 |
| 2024-07-01 | Data protection | 1 |
| 2024-07-01 | Evaluation | 14 |
| 2024-07-01 | GESOA | 114 |
| 2024-07-01 | Kanäle | 52 |
| 2024-07-01 | Medien | 51 |
| 2024-07-01 | Search Results Count | 216 |
| 2024-07-01 | click_channelcard | 25 |
| 2024-07-01 | click_slideChange | 4700 |
| 2024-07-01 | click_topic | 177 |
| 2024-07-01 | in_media_search | 255 |
| 2024-07-01 | login | 216 |
| 2024-07-01 | logout | 48 |
| 2024-07-01 | video | 48145 |
| 2024-07-01 | visit_page | 2320 |
| 2024-08-01 | Medien | 1 |
| 2024-08-01 | click_slideChange | 2 |
| 2024-08-01 | login | 2 |
| 2024-08-01 | video | 184 |
| 2024-08-01 | visit_page | 29 |
| 2024-09-01 | Data protection | 1 |
| 2024-09-01 | click_slideChange | 3 |
| 2024-09-01 | in_media_search | 8 |
| 2024-09-01 | login | 4 |
| 2024-09-01 | video | 454 |
| 2024-09-01 | visit_page | 28 |
| 2024-10-01 | GESOA | 6 |
| 2024-10-01 | click_slideChange | 107 |
| 2024-10-01 | login | 10 |
| 2024-10-01 | logout | 2 |
| 2024-10-01 | video | 380 |
| 2024-10-01 | visit_page | 36 |
| 2024-11-01 | Data protection | 1 |
| 2024-11-01 | GESOA | 28 |
| 2024-11-01 | Kanäle | 10 |
| 2024-11-01 | Medien | 10 |
| 2024-11-01 | Search Results Count | 3 |
| 2024-11-01 | click_channelcard | 3 |
| 2024-11-01 | click_slideChange | 2380 |
| 2024-11-01 | click_topic | 40 |
| 2024-11-01 | in_media_search | 10 |
| 2024-11-01 | login | 127 |
| 2024-11-01 | logout | 8 |
| 2024-11-01 | video | 9909 |
| 2024-11-01 | visit_page | 658 |
| 2024-12-01 | Data protection | 10 |
| 2024-12-01 | Evaluation | 7 |
| 2024-12-01 | GESOA | 86 |
| 2024-12-01 | Kanäle | 65 |
| 2024-12-01 | Media item | 3099 |
| 2024-12-01 | Medien | 92 |
| 2024-12-01 | Search Results Count | 60 |
| 2024-12-01 | click_channelcard | 43 |
| 2024-12-01 | click_topic | 192 |
| 2024-12-01 | in_media_search | 80 |
| 2024-12-01 | login | 495 |
| 2024-12-01 | logout | 28 |
| 2024-12-01 | video | 61655 |
| 2024-12-01 | visit_page | 3057 |
| 2025-01-01 | Data protection | 2 |
| 2025-01-01 | Evaluation | 3 |
| 2025-01-01 | GESOA | 56 |
| 2025-01-01 | Kanäle | 40 |
| 2025-01-01 | Media item | 907 |
| 2025-01-01 | Medien | 146 |
| 2025-01-01 | Search Results Count | 51 |
| 2025-01-01 | click_channelcard | 23 |
| 2025-01-01 | click_topic | 255 |
| 2025-01-01 | in_media_search | 69 |
| 2025-01-01 | login | 568 |
| 2025-01-01 | logout | 42 |
| 2025-01-01 | video | 87736 |
| 2025-01-01 | visit_page | 4055 |
| 2025-02-01 | Evaluation | 2 |
| 2025-02-01 | GESOA | 3 |
| 2025-02-01 | Kanäle | 17 |
| 2025-02-01 | Media item | 610 |
| 2025-02-01 | Medien | 13 |
| 2025-02-01 | Search Results Count | 12 |
| 2025-02-01 | click_channelcard | 11 |
| 2025-02-01 | click_topic | 8 |
| 2025-02-01 | in_media_search | 20 |
| 2025-02-01 | login | 106 |
| 2025-02-01 | logout | 2 |
| 2025-02-01 | video | 9526 |
| 2025-02-01 | visit_page | 655 |
| 2025-03-01 | Media item | 354 |
| 2025-03-01 | Medien | 3 |
| 2025-03-01 | login | 2 |
| 2025-03-01 | logout | 4 |
| 2025-03-01 | video | 6 |
| 2025-03-01 | visit_page | 24 |
tar_load(time_visit_wday)count_action_type_per_month_top3 <-
count_action_type |>
select(nr, idvisit, category) |>
ungroup() |>
filter(category %in% c("video", "click_slideChange", "visit_page")) |>
left_join(time_visit_wday |> ungroup()) |>
select(-c(dow, hour, nr)) |>
drop_na() |>
mutate(month_start = floor_date(date_time, "month")) |>
count(month_start, category)count_action_type_per_month_top3 |>
gt()| month_start | category | n |
|---|---|---|
| 2022-12-01 | click_slideChange | 337 |
| 2022-12-01 | video | 2522 |
| 2022-12-01 | visit_page | 423 |
| 2023-01-01 | click_slideChange | 438 |
| 2023-01-01 | video | 3672 |
| 2023-01-01 | visit_page | 179 |
| 2023-02-01 | click_slideChange | 1823 |
| 2023-02-01 | video | 10606 |
| 2023-02-01 | visit_page | 491 |
| 2023-03-01 | click_slideChange | 1781 |
| 2023-03-01 | video | 9870 |
| 2023-03-01 | visit_page | 2394 |
| 2023-04-01 | click_slideChange | 5428 |
| 2023-04-01 | video | 33235 |
| 2023-04-01 | visit_page | 3626 |
| 2023-05-01 | click_slideChange | 8371 |
| 2023-05-01 | video | 64184 |
| 2023-05-01 | visit_page | 4564 |
| 2023-06-01 | click_slideChange | 10802 |
| 2023-06-01 | video | 70662 |
| 2023-06-01 | visit_page | 3486 |
| 2023-07-01 | click_slideChange | 6355 |
| 2023-07-01 | video | 68992 |
| 2023-07-01 | visit_page | 3784 |
| 2023-08-01 | click_slideChange | 301 |
| 2023-08-01 | video | 2450 |
| 2023-08-01 | visit_page | 199 |
| 2023-09-01 | click_slideChange | 109 |
| 2023-09-01 | video | 3292 |
| 2023-09-01 | visit_page | 267 |
| 2023-10-01 | click_slideChange | 159 |
| 2023-10-01 | video | 1233 |
| 2023-10-01 | visit_page | 234 |
| 2023-11-01 | click_slideChange | 98 |
| 2023-11-01 | video | 1172 |
| 2023-11-01 | visit_page | 76 |
| 2023-12-01 | click_slideChange | 884 |
| 2023-12-01 | video | 8081 |
| 2023-12-01 | visit_page | 373 |
| 2024-01-01 | click_slideChange | 7781 |
| 2024-01-01 | video | 51149 |
| 2024-01-01 | visit_page | 2830 |
| 2024-02-01 | click_slideChange | 786 |
| 2024-02-01 | video | 12419 |
| 2024-02-01 | visit_page | 448 |
| 2024-03-01 | click_slideChange | 526 |
| 2024-03-01 | video | 6388 |
| 2024-03-01 | visit_page | 924 |
| 2024-04-01 | click_slideChange | 1727 |
| 2024-04-01 | video | 17945 |
| 2024-04-01 | visit_page | 1012 |
| 2024-05-01 | click_slideChange | 3029 |
| 2024-05-01 | video | 39557 |
| 2024-05-01 | visit_page | 1987 |
| 2024-06-01 | click_slideChange | 4007 |
| 2024-06-01 | video | 31850 |
| 2024-06-01 | visit_page | 2390 |
| 2024-07-01 | click_slideChange | 4700 |
| 2024-07-01 | video | 48145 |
| 2024-07-01 | visit_page | 2320 |
| 2024-08-01 | click_slideChange | 2 |
| 2024-08-01 | video | 184 |
| 2024-08-01 | visit_page | 29 |
| 2024-09-01 | click_slideChange | 3 |
| 2024-09-01 | video | 454 |
| 2024-09-01 | visit_page | 28 |
| 2024-10-01 | click_slideChange | 107 |
| 2024-10-01 | video | 380 |
| 2024-10-01 | visit_page | 36 |
| 2024-11-01 | click_slideChange | 2380 |
| 2024-11-01 | video | 9909 |
| 2024-11-01 | visit_page | 658 |
| 2024-12-01 | video | 61655 |
| 2024-12-01 | visit_page | 3057 |
| 2025-01-01 | video | 87736 |
| 2025-01-01 | visit_page | 4055 |
| 2025-02-01 | video | 9526 |
| 2025-02-01 | visit_page | 655 |
| 2025-03-01 | video | 6 |
| 2025-03-01 | visit_page | 24 |
count_action_type_per_month_top3 |>
ggplot(aes(x = month_start, y = n, color = category, group = category)) +
geom_line()eventcategoryWas machen die Visitors eigentlich? Und wie oft?
data_slim_filtered_count <-
data_slim_filtered |>
filter(type == "eventcategory") |>
count(value, sort = TRUE)
data_slim_filtered_count |>
gt()| value | n |
|---|---|
| videoplayer_click | 646885 |
| clear_transcript_text_for_llm_context | 104108 |
| click_slideChange | 61934 |
| click_button | 20409 |
| click_transcript_word | 7882 |
| click_videocard | 7164 |
| verify_option_wrong | 4255 |
| login | 4147 |
| click_topic_position_using_image | 2229 |
| click_in_media_search_results | 1268 |
| in_media_search | 1036 |
| generate_questionaire | 1014 |
| generate_questionaire_interval_minutes | 1014 |
| click_start_resize | 975 |
| click_stop_resize | 942 |
| in_media_search_results | 890 |
| verify_option_correct | 708 |
| click_channelcard | 587 |
| click_videocard_search_lecturer | 537 |
| logout | 519 |
| select_transcript_text_for_llm_context | 468 |
| click_videocard_search_course_acronym | 402 |
| click_option | 338 |
| message_to_llm | 279 |
| message_to_llm_de | 258 |
| eval | 240 |
| llm_response_de | 230 |
| click_topic_position_using_link | 213 |
| press_enter | 211 |
| click_topic_details | 181 |
| click_toggle | 171 |
| click_videocard_search_semester | 104 |
| click_channelcard_search_course_acronym | 80 |
| userRole | 75 |
| click_videocard_search_course | 57 |
| click_channelcard_search_faculty_acronym | 33 |
| click_channelcard_search_lecturer | 33 |
| message_to_llm_en | 27 |
| llm_response_en | 26 |
| click_channelcard_search_course | 20 |
| click_channelcard_search_semester | 10 |
| click_channelcard_search_faculty | 7 |
| click_link | 5 |
| click_survey | 3 |
| click_chat_message_thumbs_down | 1 |
| click_chat_message_thumbs_up | 1 |
data_slim_filtered_count |>
ggtexttable()count_action_type |>
count(category, sort = TRUE) |>
ggplot(aes(y = reorder(category, n), x = n)) +
geom_col() +
geom_bar_text() +
labs(
x = "User-Aktion",
y = "Aktion",
title = "Anzahl der User-Aktionen nach Kategorie"
) +
theme_minimal() +
scale_x_continuous(labels = scales::comma)count_action_type |>
count(category, sort = TRUE) |>
ggplot(aes(y = reorder(category, n), x = n)) +
geom_col() +
geom_bar_text() +
labs(
x = "Anazhl der User-Aktionen",
y = "Aktion",
title = "Anzahl der User-Aktionen nach Kategorie",
caption = "Log10-Skala"
) +
theme_minimal() +
scale_x_log10()tar_load(time_visit_wday)# Define a vector with the names of the days of the week
# Note: Adjust the start of the week (Sunday or Monday) as per your requirement
days_of_week <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday")
# Replace numbers with day names
time_visit_wday$dow2 <- factor(days_of_week[time_visit_wday$dow],
levels = days_of_week)time_visit_wday |>
as_tibble() |>
count(hour) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
theme_minimal() +
labs(
title = "HaNS-Nutzer sind keine Frühaufsteher",
x = "Uhrzeit",
y = "Anteil"
) # coord_polar()time_visit_wday |>
as_tibble() |>
count(hour) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
theme_minimal() +
coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = dow2, y = prop)) +
geom_col() +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
x = "Wochentag",
y = "Anteil") # coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = dow2, y = prop)) +
geom_col() +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen",
x = "Wochentag",
y = "Anteil") +
coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2, hour) |>
group_by(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
facet_wrap(~ dow2) +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
x = "Wochentag",
y = "Anteil") # coord_polar()time_visit_wday |>
as_tibble() |>
count(dow2, hour) |>
group_by(dow2) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = hour, y = prop)) +
geom_col() +
facet_wrap(~ dow2) +
theme_minimal() +
labs(title = "Verteilung der HaNS-Logins nach Wochentagen und Uhrzeiten",
x = "Wochentag",
y = "Anteil") +
coord_polar()time2 <-
time_visit_wday |>
ungroup() |>
mutate(date = as.Date(date_time))
time2 |>
ggplot(aes(x = date, y = hour)) +
geom_bin2d(binwidth = c(1, 1)) + # (1 day, 1 hour)
scale_x_date(date_breaks = "1 month") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(caption = "Each x-bin maps to one week")time2 |>
ggplot(aes(x = date, y = hour)) +
geom_bin2d(binwidth = c(7, 1)) + # 1 week, 1 hour
scale_x_date(date_breaks = "1 week", date_labels = "%W") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(x = "Week number in 2023/2024",
caption = "Each x-bin maps to one week")time2 |>
ggplot(aes(x = date, y = dow)) +
geom_bin2d(binwidth = c(7, 1)) + # 1 week, 1 hour
scale_x_date(date_breaks = "1 week", date_labels = "%W") +
theme(legend.position = "bottom") +
scale_fill_viridis_c() +
labs(x = "Week number in 2023/2024",
caption = "Each x-bin maps to one week",
y = "Day of Week") +
scale_y_continuous(breaks = 1:7)data_slim_filtered_ai <-
data_slim_filtered |>
filter(type == "eventcategory") |>
filter(str_detect(value, "llm")) |>
count(value, sort = TRUE) |>
mutate(prop = n / sum(n))
data_slim_filtered_ai |>
gt() |>
fmt_auto()| value | n | prop |
|---|---|---|
| clear_transcript_text_for_llm_context | 104,108 | 0.988 |
| select_transcript_text_for_llm_context | 468 | 0.004 |
| message_to_llm | 279 | 0.003 |
| message_to_llm_de | 258 | 0.002 |
| llm_response_de | 230 | 0.002 |
| message_to_llm_en | 27 | 2.562 × 10−4 |
| llm_response_en | 26 | 2.467 × 10−4 |
data_slim_filtered_ai |>
mutate(prop = round(prop, 3)) |>
ggtexttable()data_slim_filtered_llm_interact <-
data_slim_filtered |>
mutate(has_llm = str_detect(value, "llm")) |>
group_by(idvisit) |>
summarise(llm_used_during_visit = any(has_llm == TRUE)) |>
count(llm_used_during_visit) |>
mutate(prop = round(n /sum(n), 2))
data_slim_filtered_llm_interact|>
gt()| llm_used_during_visit | n | prop |
|---|---|---|
| FALSE | 4286 | 0.89 |
| TRUE | 509 | 0.11 |
data_slim_filtered_llm_interact |>
ggtexttable()tar_load(idvisit_has_llm)
idvisit_has_llm_timeline <-
idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = round(n/sum(n), 2))
idvisit_has_llm_timeline|>
gt()| uses_llm | n | prop |
|---|---|---|
| 2022-12 | ||
| FALSE | 275 | 0.78 |
| TRUE | 79 | 0.22 |
| 2023-1 | ||
| FALSE | 406 | 0.76 |
| TRUE | 126 | 0.24 |
| 2023-10 | ||
| FALSE | 96 | 0.78 |
| TRUE | 27 | 0.22 |
| 2023-11 | ||
| FALSE | 40 | 0.82 |
| TRUE | 9 | 0.18 |
| 2023-12 | ||
| FALSE | 314 | 0.99 |
| TRUE | 3 | 0.01 |
| 2023-2 | ||
| FALSE | 554 | 0.84 |
| TRUE | 108 | 0.16 |
| 2023-3 | ||
| FALSE | 74 | 0.94 |
| TRUE | 5 | 0.06 |
| 2023-4 | ||
| FALSE | 91 | 0.69 |
| TRUE | 40 | 0.31 |
| 2023-5 | ||
| FALSE | 119 | 0.74 |
| TRUE | 41 | 0.26 |
| 2023-6 | ||
| FALSE | 67 | 0.83 |
| TRUE | 14 | 0.17 |
| 2023-7 | ||
| FALSE | 219 | 0.93 |
| TRUE | 16 | 0.07 |
| 2023-8 | ||
| FALSE | 31 | 1.00 |
| 2023-9 | ||
| FALSE | 40 | 1.00 |
| 2024-1 | ||
| FALSE | 887 | 0.95 |
| TRUE | 44 | 0.05 |
| 2024-10 | ||
| FALSE | 9 | 1.00 |
| 2024-11 | ||
| FALSE | 166 | 0.92 |
| TRUE | 15 | 0.08 |
| 2024-12 | ||
| FALSE | 794 | 0.92 |
| TRUE | 67 | 0.08 |
| 2024-2 | ||
| FALSE | 94 | 1.00 |
| 2024-3 | ||
| FALSE | 6 | 0.67 |
| TRUE | 3 | 0.33 |
| 2024-4 | ||
| FALSE | 6 | 0.33 |
| TRUE | 12 | 0.67 |
| 2024-5 | ||
| FALSE | 113 | 1.00 |
| 2024-6 | ||
| FALSE | 70 | 0.91 |
| TRUE | 7 | 0.09 |
| 2024-7 | ||
| FALSE | 23 | 1.00 |
| 2024-8 | ||
| FALSE | 1 | 1.00 |
| 2025-1 | ||
| FALSE | 1008 | 0.94 |
| TRUE | 60 | 0.06 |
| 2025-2 | ||
| FALSE | 166 | 0.96 |
| TRUE | 7 | 0.04 |
| 2025-3 | ||
| FALSE | 2 | 1.00 |
idvisit_has_llm_timeline |>
ggtexttable()idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = n/sum(n)) |>
ggplot(aes(x = year_month, y = prop, color = uses_llm, groups = uses_llm)) +
geom_point() +
geom_line(aes(group = uses_llm)) +
labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anteile)")idvisit_has_llm |>
count(year_month, uses_llm) |>
ungroup() |>
group_by(year_month) |>
ggplot(aes(x = year_month, y = n, color = uses_llm, groups = uses_llm)) +
geom_point() +
geom_line(aes(group = uses_llm)) +
labs(title = "Visitors, die mit dem LLM interagieren im Zeitverlauf (Anzahl)")data_slim_filtered |>
filter(type == "subtitle") |>
filter(!is.na(value) & value != "") |>
count(click_transcript_word = str_detect(value, "click_transcript_word")) |>
mutate(prop = round(n/sum(n), 2)) |>
gt()| click_transcript_word | n | prop |
|---|---|---|
| FALSE | 910840 | 0.99 |
| TRUE | 7882 | 0.01 |
tar_load(data_long)regex_pattern <- "Category: \"(.*?)(?=', Action)"
ai_actions_count <-
data_long |>
# slice(1:1000) |>
filter(str_detect(value, "transcript")) |>
mutate(category = str_extract(value, regex_pattern)) |>
select(category) |>
mutate(category = str_replace_all(category, "[\"']", "")) |>
count(category, sort = TRUE)
ai_actions_count |>
gt()| category | n |
|---|---|
| NA | 217059 |
| Category: clear_transcript_text_for_llm_context | 104108 |
| Category: click_transcript_word | 7882 |
| Category: select_transcript_text_for_llm_context | 468 |
| Category: click_button | 20 |
| Category: llm_response_de | 3 |
| Category: llm_response_en | 2 |
tar_load(ai_transcript_clicks_per_month)ai_transcript_clicks_per_month_count <-
ai_transcript_clicks_per_month |>
count(year_month, clicks_transcript_any) |>
ungroup() |>
group_by(year_month) |>
mutate(prop = round(n/sum(n), 2))
ai_transcript_clicks_per_month_count|>
gt()| clicks_transcript_any | n | prop |
|---|---|---|
| 2022-12 | ||
| FALSE | 245 | 0.69 |
| TRUE | 109 | 0.31 |
| 2023-1 | ||
| FALSE | 303 | 0.57 |
| TRUE | 229 | 0.43 |
| 2023-10 | ||
| FALSE | 98 | 0.80 |
| TRUE | 25 | 0.20 |
| 2023-11 | ||
| FALSE | 41 | 0.84 |
| TRUE | 8 | 0.16 |
| 2023-12 | ||
| FALSE | 270 | 0.85 |
| TRUE | 47 | 0.15 |
| 2023-2 | ||
| FALSE | 479 | 0.72 |
| TRUE | 183 | 0.28 |
| 2023-3 | ||
| FALSE | 58 | 0.73 |
| TRUE | 21 | 0.27 |
| 2023-4 | ||
| FALSE | 93 | 0.71 |
| TRUE | 38 | 0.29 |
| 2023-5 | ||
| FALSE | 105 | 0.66 |
| TRUE | 55 | 0.34 |
| 2023-6 | ||
| FALSE | 61 | 0.75 |
| TRUE | 20 | 0.25 |
| 2023-7 | ||
| FALSE | 150 | 0.64 |
| TRUE | 85 | 0.36 |
| 2023-8 | ||
| FALSE | 27 | 0.87 |
| TRUE | 4 | 0.13 |
| 2023-9 | ||
| FALSE | 33 | 0.82 |
| TRUE | 7 | 0.17 |
| 2024-1 | ||
| FALSE | 765 | 0.82 |
| TRUE | 166 | 0.18 |
| 2024-10 | ||
| FALSE | 9 | 1.00 |
| 2024-11 | ||
| FALSE | 163 | 0.90 |
| TRUE | 18 | 0.10 |
| 2024-12 | ||
| FALSE | 778 | 0.90 |
| TRUE | 83 | 0.10 |
| 2024-2 | ||
| FALSE | 69 | 0.73 |
| TRUE | 25 | 0.27 |
| 2024-3 | ||
| FALSE | 5 | 0.56 |
| TRUE | 4 | 0.44 |
| 2024-4 | ||
| FALSE | 16 | 0.89 |
| TRUE | 2 | 0.11 |
| 2024-5 | ||
| FALSE | 106 | 0.94 |
| TRUE | 7 | 0.06 |
| 2024-6 | ||
| FALSE | 73 | 0.95 |
| TRUE | 4 | 0.05 |
| 2024-7 | ||
| FALSE | 17 | 0.74 |
| TRUE | 6 | 0.26 |
| 2024-8 | ||
| FALSE | 1 | 1.00 |
| 2025-1 | ||
| FALSE | 982 | 0.92 |
| TRUE | 86 | 0.08 |
| 2025-2 | ||
| FALSE | 171 | 0.99 |
| TRUE | 2 | 0.01 |
| 2025-3 | ||
| FALSE | 2 | 1.00 |
ai_transcript_clicks_per_month_count |>
ggtexttable()ai_transcript_clicks_per_month_count |>
mutate(date = ymd(paste0(year_month,"-01"))) |>
ggplot(aes(x = date, y = n)) +
geom_line(group = 1) +
geom_point()Wie viel Zeit verbringen die Nutzer mit dem Betrachten von Videos (“Glotzdauer”)?
Achtung: Die Videozeit ist schwierig auszuwerten. Die Nutzer beenden keine Videos, in dem sie auf “Pause” drücken, sondern indem sie andere Aktionen durchführen. Dies ist aber analytisch schwer abzubilden.
tar_load(glotzdauer)glotzdauer |>
# we will assume that negative glotzdauer is the as positive glotzdauer:
mutate(time_diff = abs(time_diff)) |>
# without glotzdauer smaller than 10 minutes:
filter(time_diff < 60*10) |>
ggplot(aes(x = time_diff)) +
geom_histogram() +
scale_x_time() +
labs(x = "Time interval [minutes]",
caption = "Only time intervals less than 10 minutes")glotzdauer_prepped <-
glotzdauer |>
# we will assume that negative glotzdauer is the as positive glotzdauer:
mutate(time_diff_abs_sec = abs(as.numeric(time_diff, units = "secs"))) |>
# without glotzdauer smaller than 10 minutes:
filter(time_diff_abs_sec < 60*10) |>
mutate(time_diff_abs_min = time_diff_abs_sec / 60)
glotzdauer_tbl <-
glotzdauer_prepped |>
select(time_diff_abs_sec, time_diff_abs_min) |>
describe_distribution()
glotzdauer_tbl |>
kable()| Variable | Mean | SD | IQR | Min | Max | Skewness | Kurtosis | n | n_Missing |
|---|---|---|---|---|---|---|---|---|---|
| time_diff_abs_sec | 57.8050366 | 103.166468 | 55.0000000 | 0 | 597.00 | 2.74483 | 7.78949 | 3693 | 0 |
| time_diff_abs_min | 0.9634173 | 1.719441 | 0.9166667 | 0 | 9.95 | 2.74483 | 7.78949 | 3693 | 0 |
glotzdauer_tbl |>
mutate(across(where(is.numeric), ~ round(., 2))) |>
ggpubr::ggtexttable()glotzdauer_prepped_tbl <-
glotzdauer_prepped |>
mutate(first_of_month = floor_date(date, unit = "month")) |>
group_by(first_of_month) |>
summarise(time_diff_mean = mean(time_diff, na.rm = TRUE))
glotzdauer_prepped_tbl |>
kable()| first_of_month | time_diff_mean |
|---|---|
| 2022-12-01 | 22.736364 secs |
| 2023-01-01 | 28.109244 secs |
| 2023-02-01 | 15.815451 secs |
| 2023-03-01 | 47.571429 secs |
| 2023-04-01 | 44.633588 secs |
| 2023-05-01 | 46.980582 secs |
| 2023-06-01 | 52.568750 secs |
| 2023-07-01 | 35.906250 secs |
| 2023-08-01 | 109.500000 secs |
| 2023-09-01 | 23.733333 secs |
| 2023-10-01 | 23.500000 secs |
| 2023-11-01 | 27.833333 secs |
| 2023-12-01 | 8.804196 secs |
| 2024-01-01 | 46.468619 secs |
| 2024-02-01 | 18.714286 secs |
| 2024-03-01 | 16.774193 secs |
| 2024-04-01 | 30.059701 secs |
| 2024-05-01 | 43.555556 secs |
| 2024-06-01 | 53.278846 secs |
| 2024-07-01 | 55.058824 secs |
| 2024-08-01 | -24.000000 secs |
| 2024-09-01 | -9.000000 secs |
| 2024-10-01 | 36.750000 secs |
| 2024-11-01 | 47.181818 secs |
| 2024-12-01 | 60.773707 secs |
| 2025-01-01 | 52.115732 secs |
| 2025-02-01 | 14.147059 secs |
| 2025-03-01 | 5.000000 secs |
glotzdauer_prepped_tbl |>
ggplot(aes(x = first_of_month, y = time_diff_mean)) +
geom_line()